import nltk
from nltk.corpus import wordnet as wn
from collections import defaultdict
import os, sys, time, re, json

# Ensure you have downloaded necessary data
nltk.download('omw-1.4')
nltk.download('wordnet')

# PATHS
PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
WORDNET_DIR = os.path.join(PROJECT_ROOT, '../../../data/wordnet')
WEIGHTED_DICT_PATH = os.path.join(PROJECT_ROOT, '../../../output/weighted_dictionary_es.json')

# Create the directory if it does not exist
if not os.path.exists(WORDNET_DIR):
    os.makedirs(WORDNET_DIR)
    # Download WordNet data
    nltk.download('wordnet', download_dir=WORDNET_DIR)
    nltk.download('omw-1.4', download_dir=WORDNET_DIR)
    # Set the WordNet data directory
    nltk.data.path.append(WORDNET_DIR)
    # Set the WordNet data directory for WordNet
    wn._dir = WORDNET_DIR
    # Set the WordNet data directory for OMW
    wn._omw_dir = WORDNET_DIR
# Set the language to Spanish
wn._lang = 'spa'
# Set the WordNet data directory for WordNet
wn._dir = WORDNET_DIR
# Set the WordNet data directory for OMW
wn._omw_dir = WORDNET_DIR

# Function to get related words and assign weights
def get_related_words(seed_words, max_depth=2):
    weighted_dict = defaultdict(int)
    
    def add_related_words(word, weight):
        synsets = wn.synsets(word, lang='spa')
        for synset in synsets:
            for lemma in synset.lemmas('spa'):
                weighted_dict[lemma.name()] += weight

    # Initialize the dictionary with seed words
    for seed_word in seed_words:
        weighted_dict[seed_word] = 1

    # Expand the dictionary based on WordNet relations
    for seed_word in seed_words:
        current_words = {seed_word}
        current_weight = 1
        
        for depth in range(max_depth):
            next_words = set()
            current_weight /= 2  # Halve the weight at each level of depth
            
            for word in current_words:
                synsets = wn.synsets(word, lang='spa')
                for synset in synsets:
                    for lemma in synset.lemmas('spa'):
                        lemma_name = lemma.name()
                        if lemma_name not in weighted_dict:
                            next_words.add(lemma_name)
                            weighted_dict[lemma_name] = current_weight
            
            current_words = next_words
    
    return weighted_dict

# Seed words in Spanish
seed_words = [
    "violencia",
    "asesinato",
    "homicidio",
    "tiroteo",
    "ataque",
    "enfrentamiento",
    "balacera",
    "secuestro",
    "narcotráfico",
    "delincuencia"
]

# Construct the weighted dictionary
weighted_dict = get_related_words(seed_words)

# Display the weighted dictionary
for word, weight in weighted_dict.items():
    print(f"{word}: {weight}")

# Optional: Save the dictionary to a file
import json
with open(WEIGHTED_DICT_PATH, 'w', encoding='utf-8') as f:
    json.dump(weighted_dict, f, ensure_ascii=False, indent=4)
